################################################################################ 
#
# The distribution of hate speech and its implications for content moderation
# PSRM - Replication package
# Table E7
#
################################################################################ 


################################################################################ 
#  LIBRARIES
################################################################################ 


library(hdm)
library(dplyr)
library(readr)

rm(list = setdiff(ls(), ls(pattern = "^wd|^setsave$")))

set.seed(42)


################################################################################ 
#   DATA AND FOLDER
################################################################################ 

data = read.csv(paste0(wd_data_processed, '/for_analysis/clean_for_analysis.csv'))
lassodata = read_rds(paste0(wd_data_processed, '/for_analysis/data_lasso.RDS'))

# drop attrited users
data = data[which(data$attrition==0 & data$not_auth_sofar==0 & data$suspended_sofar==0 & data$protected_sofar==0),]


################################################################################ 
#   Prepare
################################################################################ 

outvars = c('anchor_deleted_12h', 'no_hate_tweets_post_85', 'hate_rate_85', 'post_tweets_h_avg')
outvars_name = c( 'Xenophobic Tweet Deleted <12h', '# Hate Tweets', 'Share of daily hate', 'Probability of Hate Speech') 

treatments = c('empathy_get', 'empathy_take', 'consequences', 'ban', 'humor')
treatments_lab = c( 'Perspective Getting', 'Perspective Taking',  'Warning of Consequences',
                    'Ban', 'Humor')

# Scale data
data[outvars] = lapply(data[outvars], scale)
lassodata = lapply(lassodata, scale) %>% as.data.frame()


################################################################################ 
#   Run the models
################################################################################ 


MODELS = list()

n = 1

for (j in 1:length(outvars)){
  for (i in 1:length(treatments)){
    
    droper = union(which(is.na(data[outvars[j]])), which(data[outvars[j]]==Inf))
    keeper = union(which(data$treat_label == treatments[i]),  which(data$treat_label == 'control')) 
    keeper = setdiff(keeper, droper)
    
    y = data[keeper, outvars[j]]
    d = data$treated[keeper]
    x = lassodata[keeper, ] %>% as.matrix()
    
    print('Data ready')
    
    c = rep(FALSE, ncol(x))
    c[c(2,3)] = TRUE
    
    mod = rlassoEffect(x, y, d, method = "double selection", 
                       I3 = c)
    
    MODELS[[n]] = rlassoEffect(x, y, d, method = "double selection", 
                             I3 = c)#  included = x[,c(2,3)]) #, included = pastvars)
    
    print('Done with: ')
    print(n)
    
    n = n+1
    
  }}



################################################################################ 
#   Clean the model outcomes
################################################################################ 


MOD = MODELS

coef = sapply(MOD, function(m) summary(m)[[1]][1])
se = sapply(MOD, function(m) summary(m)[[1]][2])
pval = sapply(MOD, function(m) summary(m)[[1]][4])
regnum =  sapply(MOD, function(m) length(m$coefficients.reg)-2)
sample = sapply(MOD, function(m) m$samplesize)
controls = sapply(MOD, function(m) paste0(names(m$coefficients.reg)[-c(1, 2)], collapse = ', '))

container = cbind.data.frame(yvar = rep(outvars, each=length(treatments)),
                             yvar_name = rep(outvars_name, each=length(treatments)),
                             treat = rep(treatments, time=length(outvars)),
                             treatments_lab = rep(treatments_lab, time=length(outvars)),
                             coef = coef, se=se, pval=pval,
                             samplesize = sample,
                             lasso_cont_num = regnum,
                             controls = controls )


################################################################################ 
#   Adjust p-values
################################################################################ 


save_bh_adj = function(data){
  data$outvars.f = factor(data$yvar_name, levels = unique(data$yvar_name))
  output = data.frame()
  for (t in unique(data$treatments_lab)) {
    
    message(paste0('Pvals for ', t))
    temp = data[data$treatments_lab == t, ]
    
    pvals = temp$pval
    pvals_fdr = p.adjust(pvals, method = "BH", n = length(pvals))
    
    out = data.frame(
      treatments_lab = rep(t, length(pvals)),
      yvar_name = temp$yvar_name,
      pvals = as.numeric(pvals),
      pvals_fdr = as.numeric(pvals_fdr)
    )
    
    output = rbind(output, out)
  }
  
  return(output)
}

container_bh <- save_bh_adj(data = container)
container <- left_join(container, container_bh %>% select(treatments_lab, yvar_name, pvals_fdr), by = c("treatments_lab", "yvar_name"))

write.csv(container, paste0(wd_res, '/tables/tabE7.csv'), row.names = F )

cat("\n====================\n")
cat("Saved Table E7")
cat("\n====================\n")

